/*=======================================================================
Creator: Jingyuan Wang, jingyuanwang@u.northwestern.edu
Date created: 				08/23/2018
Date last modified: 		03/18/2019
Purpose: 
		Use QCEW data and EIA & eGRID carbon intensities data to generate 
		industry-year level carbon intensities
		
		Steps:
			I. generate industry-year level fuel usage share
				1. time-variant (with holes)
				2. time-invariant
			II generate electricity carbon intensites   
				1. use the QCEW data as the weight
				2. take an average across state to get a industry-year-national intensities
			III. combined intensities industry-year level fuel usage share
				1. interpolate time-variant fuel share (fill holes)
				2. generate carbon intensities
			IV. save
==========================================================================*/

*************************************************************************
* 				PART II. Generate electricity carbon intensities		*
*************************************************************************
use "$energy_merged/QCEW_SEDS_MECS_CI_state_ind_year.dta", clear

*1. keep only naics6 level observations
keep if industry_level == 6  | NAICS_code == 31131 | NAICS_code == 31181

*2. keep only naics6-state level observations

* use state-level employment if state add up to 60% of US employment
preserve
	keep state_code state state_name industry_code NAICS_code sector  year total_annual_wages annual_avg_emplvl
	egen group = group(state_code)
	
	keep annual_avg_emplvl total_annual_wages year NAICS_code group
	reshape wide annual_avg_emplvl total_annual_wages, i(NAICS_code year) j(group)
	
	forvalues i = 1(1)52{
		replace total_annual_wages`i' = 0 if total_annual_wages`i' == .
		replace annual_avg_emplvl`i' = 0 if annual_avg_emplvl`i' == .
	}
	*
	
	gen wage_tot = 0
	gen employment_tot = 0
	forvalues i = 2(1)52{
		replace wage_tot = wage_tot + total_annual_wages`i' 
		replace employment_tot = employment_tot +  annual_avg_emplvl`i'
	}
	*
	
	gen rate_w = wage_tot/total_annual_wages1
	gen rate_e = employment_tot/annual_avg_emplvl1
	
	bysort NAICS_code: egen rate_w_tiv = mean(rate_w)
	bysort NAICS_code: egen rate_e_tiv = mean(rate_e)
	
	bysort NAICS_code: egen rate_e_min = min(rate_e)

	gen national = 0
	replace national = 1 if rate_e_tiv < 0.5 |  rate_e_min < 0.4
	keep national NAICS_code year annual_avg_emplvl1
	rename annual_avg_emplvl1 annual_avg_emplvl_us
	label var annual_avg_emplvl_us "industry-us-year. Annual average of monthly employment levels"
	tempfile nationalemployment
	save `nationalemployment.dta', replace
	
restore

merge m:1 NAICS_code year using `nationalemployment.dta'
drop _merge

keep if state != "US" | NAICS_code == 331411 | NAICS_code == 316212 | national == 1
* industry 331411 does not have state level employment
drop if NAICS_code == 316212 & state == "NY"
* industry 316212: 2009-2010 US + 2001 NY
drop if national == 1 & state != "US"


*2. weighted average and collapse to national level
gen CI_elec_weightedsum = CI_elec_CO2 * annual_avg_emplvl
gen price_elec_weightedsum = price_elec * annual_avg_emplvl


keep NAICS_code sector year naics_mecs fuelshare_ISnaics6level ///
		CI_elec_weightedsum price_elec_weightedsum annual_avg_emplvl CI_RF CI_PE CI_PC CI_NG CI_HL CI_DF CI_CL annual_avg_emplvl_us
collapse (first) annual_avg_emplvl_us (sum) CI_elec_weightedsum price_elec_weightedsum annual_avg_emplvl  ///
			(mean) CI_RF CI_PE CI_PC CI_NG CI_HL CI_DF CI_CL ///
			, by(year NAICS_code sector naics_mecs fuelshare_ISnaics6level)
	
gen CI_elec = CI_elec_weightedsum/annual_avg_emplvl
gen price_elec = price_elec_weightedsum / annual_avg_emplvl
label var price_elec "price in $ per million Btu"

* adjust meansurement unit: lb/mwh to kg/mmBtu
replace CI_elec = CI_elec * 0.13293
label var CI_elec "electricity carbon intensities, Kg per million Btu"

*3. save 
drop CI_elec_weightedsum

foreach v in "NG" "PC" {
	label var CI_`v' "[constant] industrial use intenties, Kilograms CO2 / Million Btu"
}
*
foreach v in "CL" "DF" "HL" {
	label var CI_`v' "[constant] home use intensities , Kilograms CO2 / Million Btu"
}
*
order CI_elec, b(CI_RF)
tempfile carbonintensities
save `carbonintensities.dta', replace

*************************************************************************
* 				PART II. fuel share from MECS							*
*************************************************************************
preserve
	tempfile MECS_share
	
	do "$buildpath/code/subfunctions/gen_fuelshare_MECS.do"
	
	save `MECS_share.dta', replace	
	
	* save a list of available NAICS6 industries
	keep naics_mecs 
	keep if naics_mecs > 999
	duplicates drop
	tempfile mecscode
	save `mecscode.dta', replace
restore

*************************************************************************
* 				PART III. Combine the intensities with fuel share		*
*************************************************************************

* I merge dataset ****************************************************
* 1. set up the panel:
do "$buildpath/code/subfunctions/genPanel_1990-2016.do" 

* generate contemporary code
gen NAICS_code = naics2007
replace NAICS_code = naics2012 if year>=2011

* 2. merge in carbon intensities
merge m:1 NAICS_code year using `carbonintensities.dta'
drop if _merge == 2
* 339111 is 2002 NAICS code, it corresponds to 339114 or 334516 in 2007 code. The employment values of this industry is small. So I just drop it.
drop _merge


* 3. merge in fuel share
replace fuelshare_ISnaics6level = 0 if naics_mecs == 324122 | naics_mecs == 331110
replace naics_mecs = 324 if naics_mecs  == 324122
replace naics_mecs = 331 if naics_mecs  == 331110

merge m:1 naics_mecs year using `MECS_share.dta'
drop if _merge == 2
drop _merge

* II generate variables ****************************************************
* 1. generate carbon intensities
* (1) time-variant
gen CarbonI_TV = 0
gen totalshare = 0
foreach v in "elec" "DF" "NG" "HL" "PC" "CL"{
	replace CarbonI_TV = CarbonI_TV + CI_`v' * share_`v'
	replace totalshare = totalshare + share_`v'
}
*
replace CarbonI_TV = CarbonI_TV/totalshare


* (2) time-invariant
gen CarbonI_TINV = 0
gen nonmissingshare_TINV = 0
foreach v in "elec" "DF" "NG" "HL" "PC" "CL" {
	replace CarbonI_TINV = CarbonI_TINV + CI_`v' * share_`v'_inv
	replace nonmissingshare_TINV = nonmissingshare_TINV + share_`v'_inv
}
*
replace CarbonI_TINV = CarbonI_TINV/nonmissingshare_TINV
* but there are still within industry variations from the time-inconsistent industry code (the CI_`fuel' variable)
bysort naics2007 naics2012: egen aveCarbon = mean(CarbonI_TINV)

label var CarbonI_TV "time-variant carbon intensities, Kg per million Btu"
label var CarbonI_TINV "time-invariant carbon intensities, Kg per million Btu"

*************************************************************************
* 				PART IV. save											*
*************************************************************************

* 1. keep variables
keep census_naics naics2012 naics2007 industryname2012 industryname2007 NAICS_code sector year naics_mecs fuelshare_ISnaics6level CarbonI_TV CarbonI_TINV annual_avg_emplvl annual_avg_emplvl_us
rename NAICS_code naics
label var naics "1990-2011: 2007 code; 2012+: 2012 code"

* 2. generate a intensity [time-invariant fuel share] at NAICS2012 level
preserve
	keep if year <= 2011
	keep naics2012 year CarbonI_TINV CarbonI_TV annual_avg_emplvl annual_avg_emplvl_us
	replace CarbonI_TINV      	= CarbonI_TINV * annual_avg_emplvl
	replace CarbonI_TV			= CarbonI_TV * annual_avg_emplvl
	collapse (sum) CarbonI_TINV CarbonI_TV  annual_avg_emplvl annual_avg_emplvl_us, by(naics2012 year)
	replace CarbonI_TINV 		= CarbonI_TINV/annual_avg_emplvl
	replace CarbonI_TV 			= CarbonI_TV/annual_avg_emplvl
	
	rename CarbonI_TINV CarbonI_TINV_2012NAICS
	label var CarbonI_TINV_2012NAICS "time-invariant carbon intensities for NAICS2012, Kg per million Btu"
	rename CarbonI_TV CarbonI_TV_2012NAICS
	label var CarbonI_TV_2012NAICS "time-variant carbon intensities for NAICS2012, Kg per million Btu"
	rename annual_avg_emplvl annual_avg_emplvl_2012NAICS
	label var annual_avg_emplvl_2012NAICS "collapsed state annual_avg_emplvl to NAICS2012 level employment"
	rename annual_avg_emplvl_us annual_avg_emplvl_us_2012NAICS
	label var annual_avg_emplvl_us_2012NAICS "national level annual_avg_emplvl to NAICS2012 level employment"
	
	tempfile pre2012intensities
	save `pre2012intensities.dta', replace
restore

merge m:1 naics2012 year using `pre2012intensities.dta'
drop _merge

replace CarbonI_TINV_2012NAICS 	= CarbonI_TINV if year >= 2012
replace CarbonI_TV_2012NAICS 	= CarbonI_TV if year >= 2012
replace annual_avg_emplvl_2012NAICS = annual_avg_emplvl if year >= 2012
replace annual_avg_emplvl_us_2012NAICS = annual_avg_emplvl_us if year >= 2012

save "$EI/emissionintensities_industry_year.dta", replace
